from selenium import webdriver
from lxml import etree
import urllib.request


def initDriver():
    # 创建Chrome浏览器配置对象
    chromeOptions = webdriver.ChromeOptions()
    # 设定下载文件的保存目录为D盘的D:\downloadFile
    # 如果该目录不存在则直接创建
    # prefs = {'download.default_directory': r'D:\培训\第三周\final work\code\file buffer'}
    # 将自定义设置添加到chrome配置对象实例中
    # chromeOptions.add_experimental_option('prefs', prefs)
    chromeOptions.add_experimental_option("detach", True)
    chromeOptions.add_argument('–log-level=3')
    chromeOptions.add_experimental_option('excludeSwitches', ['enable-logging'])
    driver = webdriver.Chrome(options=chromeOptions)
    return driver


count = 0


def getFile(driver):
    # 检索部门链接
    def search_department_list():
        url = 'http://www.beijing.gov.cn/gongkai/caizheng/czzt/2021ys/index.html'
        driver.get(url)
        html = etree.HTML(driver.page_source)
        res = html.xpath("//ul[contains(@class,'on')]//a[contains(@href,'./202102')]/@href")
        # return [res[0], res[1], res[2]]
        # return res[0:6]
        return res

    # 下载部门压缩包
    def downloadRar(url):
        driver.get(url)
        html = etree.HTML(driver.page_source)
        # try:
        if True:
            res = html.xpath("//li/a[contains(text(),'预算')]/@href")[0]
            final_res = 'http://www.beijing.gov.cn/gongkai/caizheng/czzt/2021ys/202102/' + res.replace('./', '')

            opener = urllib.request.build_opener()
            opener.addheaders = [('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1'),
                                 ('Referer', final_res)]
            urllib.request.install_opener(opener)
            urllib.request.urlretrieve(final_res, 'saved files/' + res.replace('./', ''))
            global count
            count = count + 1
        # except:
        #     print('download failed', url)

    department_list = search_department_list()
    for departmentUrl in department_list:
        downloadRar('http://www.beijing.gov.cn/gongkai/caizheng/czzt/2021ys/' + departmentUrl.replace('./', ''))
        print("downloading...")

    return count


def download():
    driver = initDriver()
    # getFile(driver)
    return getFile(driver)
